Provide a leaflet map of the highest severity fires (i.e. subset to the highest category in HIGHEST_LEVEL_DESC) contained in the file buiding_fires.csv. Ignore locations that fall outside the five boroughs of New York City. Provide at least three pieces of information on the incident in a popup.
library(leaflet)
library(RColorBrewer)
borough <- c("1 - Manhattan", "2 - Bronx", "3 - Staten Island", "4 - Brooklyn", "5 - Queens") #Select only cases inside the 5 boroughs
fire_subset <- fire %>%
filter(HIGHEST_LEVEL_DESC == '75 - All Hands Working' & BOROUGH_DESC %in% borough) #Subsetting the dataframe fire on 75 - All Hands Working
pal = colorFactor("Set1", domain = fire_subset$BOROUGH_DESC) # Grab a palette, a color per borough
color_borough = pal(fire_subset$BOROUGH_DESC)
content <- paste("When:",fire_subset$INCIDENT_DATE_TIME,"<br/>",
"Address:",fire_subset$address,"<br/>",
"Property Type:",fire_subset$PROPERTY_USE_DESC,"<br/>")
m <- leaflet(fire_subset) %>% # Create a map widget
addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
setView(-73.9949344, 40.7179112, zoom = 11)
m %>% addCircleMarkers(color = color_borough, weight = 4, popup = content, radius = 1) %>%
addLegend(pal = pal, values = ~fire_subset$BOROUGH_DESC, title = "Boroughs")
Start with the previous map. Now, distinguish the markers of the fire locations by PROPERTY_USE_DESC, i.e. what kind of property was affected. If there are too many categories, collapse some categories. Choose an appropriate coloring scheme to map the locations by type of affected property. Add a legend informing the user about the color scheme. Also make sure that the information about the type of affected property is now contained in the popup information. Show this map.
pal2 = colorFactor("Set2", domain = fire_subset$PROPERTY_USE_DESC) #Different color for each property type
color_prop = pal2(fire_subset$PROPERTY_USE_DESC)
content2 <- paste("When:",fire_subset$INCIDENT_DATE_TIME,"<br/>",
"Address:",fire_subset$address,"<br/>",
"Borough:", fire_subset$BOROUGH_DESC, "<br/>",
"Property Type:",fire_subset$PROPERTY_USE_DESC,"<br/>")
m %>% addCircleMarkers(color = color_prop, weight = 4, popup = content2, opacity = 1, radius = 1) %>%
addLegend("topleft", pal = pal2, values = ~fire_subset$PROPERTY_USE_DESC, title = htmltools::HTML("Property Type"), opacity = 1)
Add marker clustering, so that zooming in will reveal the individual locations but the zoomed out map only shows the clusters. Show the map with clusters.
m %>% addCircleMarkers(color = color_prop,
popup = content2,
clusterOptions = markerClusterOptions())
The second data file contains the locations of the 218 firehouses in New York City. Start with the non-clustered map (2b) and now adjust the size of the circle markers by severity (TOTAL_INCIDENT_DURATION or UNITS_ONSCENE seem plausible options). More severe incidents should have larger circles on the map. On the map, also add the locations of the fire houses. Add two layers (“Incidents”, “Firehouses”) that allow the user to select which information to show.
content_firehouse <- paste("Firehouse Name:",firehouses$FacilityName,"<br/>",
"Address:",firehouses$FacilityAddress,"<br/>")
#icons for firehouses
firehouseIcons <- icons(
iconUrl = "./icons/pin-sharp.svg",
iconWidth = 15, iconHeight = 15,
iconAnchorX = 7.5, iconAnchorY = 8.5
)
m_l1 = leaflet() %>%
setView(-73.9949344, 40.7179112, zoom = 15) %>%
# Base groups = Background layer
addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png', group = "base") %>%
# Data Layers
## First Data Layer: Incidents
addCircleMarkers(data = fire_subset, color = color_prop, group = "Incidents",
popup = content2,
clusterOptions = markerClusterOptions(),
opacity = 0.9,
fillOpacity = 0.9,
radius = fire_subset$UNITS_ONSCENE) %>%
## Second Data Layer: Firehouses
addMarkers(data = firehouses,
icon = firehouseIcons,
group="Firehouses",
popup = content_firehouse) %>%
addLayersControl(
baseGroups = c("base"),
overlayGroups = c("Incidents","Firehouses"),
options = layersControlOptions(collapsed = TRUE))
m_l1
We now want to investigate whether the distance of the incident from the nearest firehouse varies across the city.
For all incident locations (independent of severity), identify the nearest firehouse and calculate the distance between the firehouse and the incident location. Provide a scatter plot showing the time until the first engine arrived (the variables INCIDENT_DATE_TIME and ARRIVAL_DATE_TIME) will be helpful.
library(geosphere)
#For each fire incident
for(i in 1:nrow(fire)){
#row number of the nearest firestation
r <- apply(distm(fire[,c(26,27)][i,], firehouses[,c(6,5)], fun = distHaversine), 1, which.min)
#distance of the nearest firestation
dist_min <- apply(distm(fire[,c(26,27)][i,], firehouses[,c(6,5)], fun = distHaversine), 1, min)
#column to store nearest firestation name
fire$NEAREST_STATION[i] <- firehouses$FacilityName[r]
fire$DISTANCE[i] <- dist_min
}
library(lubridate)
#time_taken_to_arrive is in seconds
fire$TIME_TAKEN_TO_ARRIVE = parse_date_time(fire$ARRIVAL_DATE_TIME, "%m/%d/%Y %H:%M:%S %p", tz = "EST") -
parse_date_time(fire$INCIDENT_DATE_TIME, "%m/%d/%Y %H:%M:%S %p", tz = "EST")
#Converting time in minutes
fire$TIME_TAKEN_TO_ARRIVE <- round(fire$TIME_TAKEN_TO_ARRIVE/60, digits = 2)
#Converting distance into kilometers
fire$DISTANCE <- round(fire$DISTANCE/1000, digits = 2)
Note :Arrival date time is missing for 21 incidents.
library(ggplot2)
library(ggthemes)
plot1 <- ggplot(data = fire, aes(x = TIME_TAKEN_TO_ARRIVE, y = DISTANCE)) +
geom_point() +
theme_tufte() +
labs(title = "Mapping Fire Incidents and FDNY Response Times",
subtitle = "Relation between arrival time of fire engines and distance from a firehouse",
caption = "source: NYC data") +
scale_x_discrete(name ="Arrival time of the first fire engine (in min)",
limits=c(1,5,10,15,20,25,30,35,40, 50, 60, 70, 80, 90, 100)) +
ylab("Distance to the nearest firehouse (in km)")
plot1
Note :There are multiple outliers which are making it difficult to see the trend.
Removing the outliers for a better graph.
fire_better <- fire %>%
filter(DISTANCE < 25 &
TIME_TAKEN_TO_ARRIVE < 25 &
TIME_TAKEN_TO_ARRIVE > 0)
plot2 <- ggplot(data = fire_better, aes(x = TIME_TAKEN_TO_ARRIVE, y = DISTANCE)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
theme_tufte() +
labs(title = "Mapping Fire Incidents and FDNY Response Times",
subtitle = "Relation between arrival time of fire engines and distance from a firehouse",
caption = "source: NYC data") +
scale_x_discrete(name ="Arrival time of the first fire engine (in min)",
limits=c(1,5,10,15,20,25)) +
ylab("Distance to the nearest firehouse (in km)")
plot2
Now also visualize the patterns separately for severe and non-severe incidents (use HIGHEST_LEVEL_DESC but feel free to reduce the number of categories). What do you find?
plot3 <- ggplot(data = fire_better, aes(x = TIME_TAKEN_TO_ARRIVE, y = DISTANCE)) +
geom_point(aes(color = COMBINED_LEVEL_DESC)) +
facet_wrap(COMBINED_LEVEL_DESC ~ ., nrow = 2) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Mapping Fire Incidents and FDNY Response Times",
subtitle = "Relation between arrival time of fire engines and distance from a firehouse",
caption = "source: NYC data") +
scale_x_discrete(name ="Arrival time of the first fire engine (in min)",
limits=c(1,5,10,15,20,25)) +
ylab("Distance to the nearest firehouse (in km)") +
scale_color_discrete(name = "Incident Severity")
plot3
fire_better <- fire_better %>%
filter(!is.na(fire_better$COMBINED_LEVEL_DESC))
plot4 <- ggplot(data = fire_better, aes(x = TIME_TAKEN_TO_ARRIVE,
y = DISTANCE,
color = COMBINED_LEVEL_DESC)) +
geom_smooth(method = "lm",
se = FALSE) +
labs(title = "Mapping Fire Incidents and FDNY Response Times",
subtitle = "Relation between arrival time of fire engines and distance from a firehouse",
caption = "source: NYC data") +
scale_x_discrete(name ="Arrival time of the first fire engine (in min)",
limits=c(1,5,10,15,20)) +
ylab("Distance to the nearest firehouse (in km)") +
scale_color_discrete(name = "Incident Severity")
plot4
The slopes help compare if the incident severity matters in the relation between arrival time and the incident distance.
Second Alarm incidences have the highest slope but there are lesser number of incidences for second alarm.
Highest alarm incidences have a higher slope than first alarm incidences. This implies that the first fire engine reaches similar distance faster if the incidence is of highest severity compared to first alarm severity.
Provide a map visualization of response times. Investigate whether the type of property affected (PROPERTY_USE_DESC) or fire severity (HIGHEST_LEVEL_DESC) play a role here.
#Select only cases inside the 5 boroughs and only required columns
fire_subset1 <- fire %>%
filter(BOROUGH_DESC %in% borough) %>%
select(TIME_TAKEN_TO_ARRIVE, PROPERTY_USE_DESC, HIGHEST_LEVEL_DESC, lon, lat)
#icons
pinIcons <- icons(
iconUrl = "./icons/pin-sharp.svg",
iconWidth = 15, iconHeight = 15,
iconAnchorX = 7.5, iconAnchorY = 8.5
)
pal3 = colorFactor("RdYlGn", domain = factor(fire_subset1$TIME_TAKEN_TO_ARRIVE), reverse = TRUE) # Grab a palette
content_prop <- paste("Property Type:",fire_subset1$PROPERTY_USE_DESC,"<br/>",
"Arrival Time (in mins):", fire_subset1$TIME_TAKEN_TO_ARRIVE, "<br/>")
content_sev <- paste("Severity:", fire_subset1$HIGHEST_LEVEL_DESC, "<br/>",
"Arrival Time (in mins):", fire_subset1$TIME_TAKEN_TO_ARRIVE, "<br/>")
leaflet() %>%
setView(-73.9949344, 40.7179112, zoom = 15) %>%
addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
addMarkers(data = fire_subset1,
group = "Property Type",
lng = fire_subset1$lon,
lat = fire_subset1$lat,
popup = content_prop,
icon = pinIcons
) %>%
addCircleMarkers(color = pal3(fire_subset1$TIME_TAKEN_TO_ARRIVE),
lng = fire_subset1$lon,
lat = fire_subset1$lat,
opacity = 0.3,
fillOpacity = 0.3,
radius = fire_subset1$TIME_TAKEN_TO_ARRIVE)
leaflet() %>%
setView(-73.9949344, 40.7179112, zoom = 15) %>%
addTiles('http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png') %>%
#Data Layers
addCircleMarkers(data = fire_subset1,
lng = fire_subset1$lon,
lat = fire_subset1$lat,
popup = content_sev,
color = pal3(fire_subset1$TIME_TAKEN_TO_ARRIVE),
radius = ~ifelse(fire_subset1$HIGHEST_LEVEL_DESC == "1- First Alarm" | HIGHEST_LEVEL_DESC == "0 - Initial alarm", 6, ifelse(fire_subset1$HIGHEST_LEVEL_DESC == "2- Second Alarm" | fire_subset1$HIGHEST_LEVEL_DESC == "3- Third Alarm", 10, ifelse(fire_subset1$HIGHEST_LEVEL_DESC == "4- Fourth Alarm" | fire_subset1$HIGHEST_LEVEL_DESC == "5- Fifth Alarm", 14, 18))),
fillOpacity = 0.8)
#Creating the required summarised dataframe
df_fire_summary <- fire %>%
select(INCIDENT_DATE_TIME, BOROUGH_DESC, TIME_TAKEN_TO_ARRIVE)
df_fire_summary$INCIDENT_YEAR <- year(parse_date_time(fire$INCIDENT_DATE_TIME, "%m/%d/%Y %H:%M:%S %p", tz = "EST"))
drops <- c("INCIDENT_DATE_TIME")
df_fire_summary <- df_fire_summary[, !(names(df_fire_summary) %in% drops)]
df_fire_summary1 <- df_fire_summary %>%
dplyr::group_by(INCIDENT_YEAR, BOROUGH_DESC) %>%
dplyr::summarise(MEAN_RESPONSE_TIME = round(mean(TIME_TAKEN_TO_ARRIVE, na.rm = TRUE), digits = 2))
#Splitting the dataframe by year
df_splitted_by_year <- df_fire_summary1 %>%
split(.$INCIDENT_YEAR)
#creating a copy of the spatial data frame to join the summary time data of all the years
nyc_all <- nyc
yr <- 2013
for(tib in df_splitted_by_year){
nyc_all <- merge(nyc_all, tib, by.x = "boro_code", by.y = "BOROUGH_DESC")
names(nyc_all)[names(nyc_all) == 'INCIDENT_YEAR'] <- paste("YEAR_", yr, sep = "")
names(nyc_all)[names(nyc_all) == 'MEAN_RESPONSE_TIME'] <- paste("MEAN_RESPONSE_TIME_", yr, sep = "")
yr = yr + 1
}
Show a faceted choropleth map indicating how response times have developed over the years. What do you find?
layout <- tm_layout(
legend.title.size = 1,
legend.text.size = 0.6,
legend.position = c(0.8,0),
legend.bg.color = "white",
legend.bg.alpha = 1,
bg.color="white",
frame=FALSE)
tm_shape(nyc_all) +
layout +
tm_polygons(c("MEAN_RESPONSE_TIME_2013", "MEAN_RESPONSE_TIME_2014", "MEAN_RESPONSE_TIME_2015", "MEAN_RESPONSE_TIME_2016", "MEAN_RESPONSE_TIME_2017", "MEAN_RESPONSE_TIME_2018"),
style=c("pretty"),
palette=c("YlOrBr"),
auto.palette.mapping=FALSE,
title=c("Avg Response Time 2013", "Avg Response Time 2014", "Avg Response Time 2015", "Avg Response Time 2016", "Avg Response Time 2017", "Avg Response Time 2018")) +
tm_style_white() +
tm_text("boro_name", size=.6, shadow=TRUE,
bg.color="white", bg.alpha=.25,
remove.overlap=TRUE)